This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.

#Hypothesis-Preseason rankings affect march madness results
library(readxl)
library(readr)
getwd()
## [1] "/Users/shaunmckellarjr/Desktop/College Basketball Project/Excel file"
setwd("/Users/shaunmckellarjr/Desktop/College Basketball Project/Excel file")
CFB_Data <- read.csv("CBB Preseason Rankings.csv")
length(CFB_Data)
## [1] 4
head(CFB_Data)
dim(CFB_Data)
## [1] 250   4
summary(CFB_Data)
##       Year           Rank        Team               Round      
##  Min.   :2009   Min.   : 1   Length:250         Min.   :0.000  
##  1st Qu.:2011   1st Qu.: 7   Class :character   1st Qu.:1.000  
##  Median :2014   Median :13   Mode  :character   Median :2.000  
##  Mean   :2014   Mean   :13                      Mean   :2.436  
##  3rd Qu.:2016   3rd Qu.:19                      3rd Qu.:3.000  
##  Max.   :2018   Max.   :25                      Max.   :7.000
str(CFB_Data)
## 'data.frame':    250 obs. of  4 variables:
##  $ Year : int  2018 2018 2018 2018 2018 2018 2018 2018 2018 2018 ...
##  $ Rank : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Team : chr  "DUKEDuke" "UVAVirginia" "UNCNorth Carolina" "GONZGonzaga" ...
##  $ Round: int  4 7 3 4 5 3 4 3 6 3 ...
colSums(is.na(CFB_Data))
##  Year  Rank  Team Round 
##     0     0     0     0
names(CFB_Data)
## [1] "Year"  "Rank"  "Team"  "Round"
CFB_Data
# Filter for Rank 1
rank1_data <- subset(CFB_Data, Rank == 1)

# Count the number of times each team has been ranked #1
rank1_counts <- table(rank1_data$Team)

rank1_counts
## 
##          DUKEDuke        INDIndiana          KUKansas        UKKentucky 
##                 3                 1                 1                 2 
## UNCNorth Carolina     VILLVillanova 
##                 2                 1
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.2
# Filter for top 5 ranks
top5_data <- subset(CFB_Data, Rank <= 5)

# Count the number of times each team has been in the top 5
top5_counts <- as.data.frame(table(top5_data$Team))

# Rename columns for clarity
names(top5_counts) <- c("Team", "Count")

# Create the bar plot
ggplot(top5_counts, aes(x = reorder(Team, Count), y = Count)) +
  geom_bar(stat = "identity", fill = "orange") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(x = "Team", y = "Number of Times in Top 5", title = "Number of Times Teams Ranked in Top 5")

library(gridExtra)
# Filter for top 5 ranks
top5_data <- subset(CFB_Data, Rank <= 5)

# Prepare the data for plotting
# Create a table counting the number of top 5 ranks for each team by year
top5_yearly_counts <- as.data.frame(table(Year = top5_data$Year, Team = top5_data$Team))

# Rename columns for clarity
names(top5_yearly_counts) <- c("Year", "Team", "Count")

# Generate a list of unique teams
teams <- unique(top5_yearly_counts$Team)

# Plot the data in separate graphs (could use a loop or lapply for this)
plots <- lapply(teams, function(team) {
  team_data <- subset(top5_yearly_counts, Team == team)
  ggplot(team_data, aes(x = Year, y = Count)) +
    geom_line() +
    geom_point() +
    scale_y_continuous(limits = c(0, max(top5_yearly_counts$Count)), breaks = 0:max(top5_yearly_counts$Count)) +
    labs(title = paste("Top 5 Rankings Over Years:", team))
})

# Determine the number of columns and rows based on the number of plots
num_of_plots <- length(plots)
num_of_cols <- ceiling(sqrt(num_of_plots))
num_of_rows <- ceiling(num_of_plots / num_of_cols)

# Arrange the plots into a grid
do.call(grid.arrange, c(plots, ncol = num_of_cols, nrow = num_of_rows))
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

# Assuming your data frame is named CFB_Data and it has 'Rank' and 'Round' columns
# Calculate the Pearson correlation coefficient
correlation <- cor(CFB_Data$Rank, CFB_Data$Round, method = "pearson")

# If the data is not normally distributed or the relationship is not linear,
# use Spearman's rank correlation
correlation_spearman <- cor(CFB_Data$Rank, CFB_Data$Round, method = "spearman")

# Output the correlation coefficient
correlation
## [1] -0.4589596
correlation_spearman
## [1] -0.4608207
library(ggplot2)

ggplot(CFB_Data, aes(x = Rank, y = Round)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  labs(x = "Preseason Ranking", y = "Tournament Round Reached", 
       title = "Correlation between Preseason Rankings and Tournament Performance")
## `geom_smooth()` using formula = 'y ~ x'

library(ggplot2)
# Define what we mean by 'lower-ranked' and 'successful run'
lower_rank_threshold <- 15
successful_run_threshold <- 4

# Create the plot
ggplot(CFB_Data, aes(x = Rank, y = Round)) +
  geom_point(aes(color = (Rank > lower_rank_threshold & Round > successful_run_threshold))) +
  scale_color_manual(values = c('FALSE' = 'black', 'TRUE' = 'red')) +
  geom_smooth(method = "lm", se = FALSE, color = 'blue') +
  labs(x = "Preseason Ranking", y = "Tournament Round Reached", 
       title = "Highlighting Lower-Ranked Teams with Successful Tournament Runs") +
  theme_minimal() +
  theme(legend.position = "none")
## `geom_smooth()` using formula = 'y ~ x'

# Define what we mean by 'lower-ranked' and 'successful run'
lower_rank_threshold <- 15
successful_run_threshold <- 4

# Identify the underdog moments
underdogs <- subset(CFB_Data, Rank > lower_rank_threshold & Round > successful_run_threshold)

# Create the plot
p <- ggplot(CFB_Data, aes(x = Rank, y = Round)) +
  geom_point(aes(color = (Rank > lower_rank_threshold & Round > successful_run_threshold))) +
  scale_color_manual(values = c('FALSE' = 'black', 'TRUE' = 'red')) +
  geom_smooth(method = "lm", se = FALSE, color = 'blue') +
  labs(x = "Preseason Ranking", y = "Tournament Round Reached", 
       title = "Highlighting Lower-Ranked Teams with Successful Tournament Runs") +
  theme_minimal() +
  theme(legend.position = "none")

# Annotate underdog teams
p <- p + geom_text(data = underdogs, aes(label = paste(Team, Year, sep=", ")), 
                   vjust = -1, hjust = 1, color = 'red')

# Print the plot
print(p)
## `geom_smooth()` using formula = 'y ~ x'

# Load necessary libraries
library(ggplot2)
library(readr)

# Read the annual performance data
annual_performance <- read_csv("/Users/shaunmckellarjr/Desktop/College Basketball Project/annual_performance.csv")
## Rows: 10 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (2): Year, Round
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Plot the time series of the average tournament round reached
ggplot(annual_performance, aes(x = Year, y = Round)) +
  geom_line() +  # Add a line to connect the points
  geom_point() +  # Add points to represent each year
  theme_minimal() +  # Use a minimal theme for the plot
  labs(title = "Average Tournament Round Reached by Top 5 Preseason Ranked Teams",
       x = "Year",
       y = "Average Tournament Round Reached")

# Load the necessary libraries
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)

# Load your dataset
CFB_Data <- read_csv("/Users/shaunmckellarjr/Desktop/College Basketball Project/Excel file/CBB Preseason Rankings.csv")
## Rows: 250 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Team
## dbl (3): Year, Rank, Round
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Filter for "DUKEDuke" data
duke_data <- CFB_Data %>%
  filter(Team == "DUKEDuke") %>%
  group_by(Year) %>%
  summarize(AverageRound = mean(Round))

# Plot the yearly performance trend for "DUKEDuke"
ggplot(duke_data, aes(x = Year, y = AverageRound)) +
  geom_line() +
  geom_point() +
  theme_minimal() +
  labs(title = "Yearly Tournament Performance Trend for Duke",
       x = "Year", 
       y = "Average Tournament Round Reached")

# Load the necessary libraries
library(ggplot2)
library(dplyr)
library(readr)

# Load your dataset
CFB_Data <- read_csv("/Users/shaunmckellarjr/Desktop/College Basketball Project/Excel file/CBB Preseason Rankings.csv")
## Rows: 250 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Team
## dbl (3): Year, Rank, Round
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Now, let's calculate the average tournament round reached for each rank for each year.
rank_performance_over_time <- CFB_Data %>%
  group_by(Year, Rank) %>%
  summarize(AverageRound = mean(Round), .groups = 'drop')

# Plot the general trend of ranks over the years.
ggplot(rank_performance_over_time, aes(x = Rank, y = AverageRound, group = Year)) +
  geom_line(aes(color = factor(Year))) +
  geom_point(aes(color = factor(Year))) +
  theme_minimal() +
  labs(title = "Tournament Outcome Trends Based on Preseason Ranks Over Years",
       x = "Preseason Rank", 
       y = "Average Tournament Round Reached",
       color = "Year")

library(ggplot2)
library(dplyr)
library(readr)

# Load your dataset
CFB_Data <- read_csv("/Users/shaunmckellarjr/Desktop/College Basketball Project/Excel file/CBB Preseason Rankings.csv")
## Rows: 250 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Team
## dbl (3): Year, Rank, Round
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# For simplicity, let's start by looking at the trend for rank 1 only.
rank1_data <- CFB_Data %>%
  filter(Rank == 1) %>%
  group_by(Year) %>%
  summarize(AverageRound = mean(Round), .groups = 'drop')

# Plot the trend for rank 1
ggplot(rank1_data, aes(x = Year, y = AverageRound)) +
  geom_line() +
  geom_point() +
  theme_minimal() +
  labs(title = "Yearly Tournament Performance Trend for Rank 1 Teams",
       x = "Year", 
       y = "Average Tournament Round Reached")

# Calculate statistical summaries for each rank
rank_summaries <- CFB_Data %>%
  group_by(Rank) %>%
  summarize(MedianRound = median(Round),
            IQR = IQR(Round),
            .groups = 'drop')

# View the statistical summaries
print(rank_summaries)
## # A tibble: 25 × 3
##     Rank MedianRound   IQR
##    <dbl>       <dbl> <dbl>
##  1     1         4    1.75
##  2     2         4.5  4.25
##  3     3         3    2.5 
##  4     4         4    1.75
##  5     5         3    2.5 
##  6     6         3    3   
##  7     7         3    1.75
##  8     8         3    1.75
##  9     9         4.5  2.75
## 10    10         2.5  2.75
## # ℹ 15 more rows
# Load the necessary libraries
library(ggplot2)
library(dplyr)
library(readr)

# Load your dataset
CFB_Data <- read_csv("/Users/shaunmckellarjr/Desktop/College Basketball Project/Excel file/CBB Preseason Rankings.csv")
## Rows: 250 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Team
## dbl (3): Year, Rank, Round
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Make sure 'Year' is treated as numeric, which is necessary for regression
CFB_Data$Year <- as.numeric(CFB_Data$Year)

# Calculate the average round reached for each rank in each year
rank_performance <- CFB_Data %>%
  group_by(Year, Rank) %>%
  summarize(AverageRound = mean(Round), .groups = 'drop')

# Now we'll create a list of linear models, one for each rank
model_list <- list()
for(rank in unique(CFB_Data$Rank)) {
  model_data <- rank_performance %>% filter(Rank == rank)
  model_list[[as.character(rank)]] <- lm(AverageRound ~ Year, data = model_data)
}

# Summarize the models
model_summaries <- lapply(model_list, summary)

# Print the summaries for each rank
model_summaries
## $`1`
## 
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.2485 -0.6500 -0.4000  0.8849  2.1697 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) -276.7697   325.9606  -0.849    0.421
## Year           0.1394     0.1619   0.861    0.414
## 
## Residual standard error: 1.47 on 8 degrees of freedom
## Multiple R-squared:  0.08482,    Adjusted R-squared:  -0.02958 
## F-statistic: 0.7414 on 1 and 8 DF,  p-value: 0.4143
## 
## 
## $`2`
## 
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3939 -2.0379  0.0758  2.1288  2.5758 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) -56.5151   534.0139  -0.106    0.918
## Year          0.0303     0.2652   0.114    0.912
## 
## Residual standard error: 2.409 on 8 degrees of freedom
## Multiple R-squared:  0.001629,   Adjusted R-squared:  -0.1232 
## F-statistic: 0.01305 on 1 and 8 DF,  p-value: 0.9118
## 
## 
## $`3`
## 
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6364 -1.0864 -0.1273  0.7591  3.1454 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) -216.8545   433.6804  -0.500    0.631
## Year           0.1091     0.2154   0.506    0.626
## 
## Residual standard error: 1.956 on 8 degrees of freedom
## Multiple R-squared:  0.03107,    Adjusted R-squared:  -0.09005 
## F-statistic: 0.2565 on 1 and 8 DF,  p-value: 0.6262
## 
## 
## $`4`
## 
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.3879 -0.9742 -0.1727  0.8061  3.3879 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) -448.0121   402.3305  -1.114    0.298
## Year           0.2242     0.1998   1.122    0.294
## 
## Residual standard error: 1.815 on 8 degrees of freedom
## Multiple R-squared:  0.136,  Adjusted R-squared:  0.02802 
## F-statistic: 1.259 on 1 and 8 DF,  p-value: 0.2943
## 
## 
## $`5`
## 
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##   -1.8   -1.3   -0.4    0.7    2.8 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) -399.0000   390.3066  -1.022    0.337
## Year           0.2000     0.1938   1.032    0.332
## 
## Residual standard error: 1.761 on 8 degrees of freedom
## Multiple R-squared:  0.1174, Adjusted R-squared:  0.007117 
## F-statistic: 1.065 on 1 and 8 DF,  p-value: 0.3324
## 
## 
## $`6`
## 
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.3091 -0.3773 -0.1364  0.3106  2.2485 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -1119.8788   292.7459  -3.825  0.00505 **
## Year            0.5576     0.1454   3.835  0.00498 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.321 on 8 degrees of freedom
## Multiple R-squared:  0.6477, Adjusted R-squared:  0.6037 
## F-statistic: 14.71 on 1 and 8 DF,  p-value: 0.004982
## 
## 
## $`7`
## 
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5515 -0.7212  0.1091  0.9318  1.8364 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) 197.84848  309.79870   0.639    0.541
## Year         -0.09697    0.15386  -0.630    0.546
## 
## Residual standard error: 1.398 on 8 degrees of freedom
## Multiple R-squared:  0.0473, Adjusted R-squared:  -0.07178 
## F-statistic: 0.3972 on 1 and 8 DF,  p-value: 0.5461
## 
## 
## $`8`
## 
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7030 -0.7333  0.1030  0.4985  1.8727 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) 235.1576   260.0136   0.904    0.392
## Year         -0.1152     0.1291  -0.892    0.399
## 
## Residual standard error: 1.173 on 8 degrees of freedom
## Multiple R-squared:  0.09041,    Adjusted R-squared:  -0.02329 
## F-statistic: 0.7952 on 1 and 8 DF,  p-value: 0.3986
## 
## 
## $`9`
## 
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3.08485 -1.24091 -0.00909  1.25455  2.76364 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) 345.6848   440.7478   0.784    0.455
## Year         -0.1697     0.2189  -0.775    0.460
## 
## Residual standard error: 1.988 on 8 degrees of freedom
## Multiple R-squared:  0.06988,    Adjusted R-squared:  -0.04639 
## F-statistic: 0.601 on 1 and 8 DF,  p-value: 0.4605
## 
## 
## $`10`
## 
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2667 -1.1500  0.1333  1.3500  2.2667 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) 539.5333   403.7160   1.336    0.218
## Year         -0.2667     0.2005  -1.330    0.220
## 
## Residual standard error: 1.821 on 8 degrees of freedom
## Multiple R-squared:  0.1811, Adjusted R-squared:  0.0787 
## F-statistic: 1.769 on 1 and 8 DF,  p-value: 0.2202
## 
## 
## $`11`
## 
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0000 -1.8833  0.2333  0.7500  4.2000 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) 137.13333  534.64091   0.256    0.804
## Year         -0.06667    0.26553  -0.251    0.808
## 
## Residual standard error: 2.412 on 8 degrees of freedom
## Multiple R-squared:  0.007818,   Adjusted R-squared:  -0.1162 
## F-statistic: 0.06304 on 1 and 8 DF,  p-value: 0.8081
## 
## 
## $`12`
## 
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.76364 -0.46667 -0.00606  0.50909  2.04242 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) -193.04848  257.85696  -0.749    0.475
## Year           0.09697    0.12806   0.757    0.471
## 
## Residual standard error: 1.163 on 8 degrees of freedom
## Multiple R-squared:  0.06688,    Adjusted R-squared:  -0.04976 
## F-statistic: 0.5733 on 1 and 8 DF,  p-value: 0.4706
## 
## 
## $`13`
## 
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.6182 -1.3318 -0.6242  1.1621  3.3697 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)  27.00606  387.05004   0.070    0.946
## Year         -0.01212    0.19223  -0.063    0.951
## 
## Residual standard error: 1.746 on 8 degrees of freedom
## Multiple R-squared:  0.0004968,  Adjusted R-squared:  -0.1244 
## F-statistic: 0.003976 on 1 and 8 DF,  p-value: 0.9513
## 
## 
## $`14`
## 
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0485 -0.9530 -0.4424  1.1954  2.9091 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) -83.52121  373.83957  -0.223    0.829
## Year          0.04242    0.18567   0.228    0.825
## 
## Residual standard error: 1.686 on 8 degrees of freedom
## Multiple R-squared:  0.006484,   Adjusted R-squared:  -0.1177 
## F-statistic: 0.05221 on 1 and 8 DF,  p-value: 0.825
## 
## 
## $`15`
## 
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.9394 -0.6000 -0.1485  0.9803  1.6424 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) -193.64848  267.21599  -0.725    0.489
## Year           0.09697    0.13271   0.731    0.486
## 
## Residual standard error: 1.205 on 8 degrees of freedom
## Multiple R-squared:  0.06256,    Adjusted R-squared:  -0.05462 
## F-statistic: 0.5339 on 1 and 8 DF,  p-value: 0.4858
## 
## 
## $`16`
## 
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8606  0.1273  0.2000  0.2545  1.3091 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)  50.61212  242.22348   0.209    0.840
## Year         -0.02424    0.12030  -0.202    0.845
## 
## Residual standard error: 1.093 on 8 degrees of freedom
## Multiple R-squared:  0.005051,   Adjusted R-squared:  -0.1193 
## F-statistic: 0.04061 on 1 and 8 DF,  p-value: 0.8453
## 
## 
## $`17`
## 
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.39394 -0.73636 -0.05152  0.79394  1.41818 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) -376.9939   237.5679  -1.587    0.151
## Year           0.1879     0.1180   1.592    0.150
## 
## Residual standard error: 1.072 on 8 degrees of freedom
## Multiple R-squared:  0.2407, Adjusted R-squared:  0.1458 
## F-statistic: 2.536 on 1 and 8 DF,  p-value: 0.15
## 
## 
## $`18`
## 
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9273 -1.7636 -0.8455  1.9909  4.3636 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) 149.03636  596.70259   0.250    0.809
## Year         -0.07273    0.29635  -0.245    0.812
## 
## Residual standard error: 2.692 on 8 degrees of freedom
## Multiple R-squared:  0.007472,   Adjusted R-squared:  -0.1166 
## F-statistic: 0.06023 on 1 and 8 DF,  p-value: 0.8123
## 
## 
## $`19`
## 
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0242 -0.2667 -0.1000  0.3697  2.2182 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) -242.4606   284.6214  -0.852    0.419
## Year           0.1212     0.1414   0.857    0.416
## 
## Residual standard error: 1.284 on 8 degrees of freedom
## Multiple R-squared:  0.08418,    Adjusted R-squared:  -0.0303 
## F-statistic: 0.7353 on 1 and 8 DF,  p-value: 0.4161
## 
## 
## $`20`
## 
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.9454 -0.4273 -0.2242  0.2273  2.9939 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) -376.1939   332.4069  -1.132    0.291
## Year           0.1879     0.1651   1.138    0.288
## 
## Residual standard error: 1.499 on 8 degrees of freedom
## Multiple R-squared:  0.1393, Adjusted R-squared:  0.03175 
## F-statistic: 1.295 on 1 and 8 DF,  p-value: 0.288
## 
## 
## $`21`
## 
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1091 -0.9818 -0.5000  0.7500  2.0606 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)  49.81212  292.74588   0.170    0.869
## Year         -0.02424    0.14539  -0.167    0.872
## 
## Residual standard error: 1.321 on 8 degrees of freedom
## Multiple R-squared:  0.003463,   Adjusted R-squared:  -0.1211 
## F-statistic: 0.0278 on 1 and 8 DF,  p-value: 0.8717
## 
## 
## $`22`
## 
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8182 -0.4394 -0.2121  0.3485  1.3939 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 428.60606  171.49509   2.499   0.0370 *
## Year         -0.21212    0.08517  -2.490   0.0375 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7736 on 8 degrees of freedom
## Multiple R-squared:  0.4367, Adjusted R-squared:  0.3663 
## F-statistic: 6.203 on 1 and 8 DF,  p-value: 0.03749
## 
## 
## $`23`
## 
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.21818 -0.40909 -0.10000  0.05909  1.49091 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -474.81818  196.57886  -2.415   0.0422 *
## Year           0.23636    0.09763   2.421   0.0418 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8868 on 8 degrees of freedom
## Multiple R-squared:  0.4229, Adjusted R-squared:  0.3507 
## F-statistic: 5.861 on 1 and 8 DF,  p-value: 0.04178
## 
## 
## $`24`
## 
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -1.467 -0.950 -0.200  0.850  1.667 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) 135.53333  268.46694   0.505    0.627
## Year         -0.06667    0.13333  -0.500    0.631
## 
## Residual standard error: 1.211 on 8 degrees of freedom
## Multiple R-squared:  0.0303, Adjusted R-squared:  -0.09091 
## F-statistic:  0.25 on 1 and 8 DF,  p-value: 0.6305
## 
## 
## $`25`
## 
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1091 -0.7000 -0.3818  0.8909  1.3455 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) -181.34545  213.46607  -0.850    0.420
## Year           0.09091    0.10602   0.857    0.416
## 
## Residual standard error: 0.963 on 8 degrees of freedom
## Multiple R-squared:  0.08418,    Adjusted R-squared:  -0.0303 
## F-statistic: 0.7353 on 1 and 8 DF,  p-value: 0.4161
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
# Create a ggplot object
p <- ggplot(CFB_Data, aes(x = Rank, y = Round, text = paste(Team, Year, sep=", "))) +
  geom_point(aes(color = (Rank > lower_rank_threshold & Round > successful_run_threshold))) +
  scale_color_manual(values = c('FALSE' = 'black', 'TRUE' = 'red')) +
  geom_smooth(method = "lm", se = FALSE, color = 'blue') +
  labs(x = "Preseason Ranking", y = "Tournament Round Reached", 
       title = "Highlighting Lower-Ranked Teams with Successful Tournament Runs") +
  theme_minimal()

# Convert the ggplot object to a Plotly object
p_plotly <- ggplotly(p, tooltip = "text")
## `geom_smooth()` using formula = 'y ~ x'
# Print the Plotly plot
p_plotly